# this script creates a network graph from ownership links
# previously extracted from registries (JSCReg and SReg)

# load the list of ownership links as of specific date
all_entities <- read_dta(paste0("Ownership Data/all_entities_fin_", 
                                  search_date, ".dta"), encoding = "UTF-8")

# oligarch group list
group_list <- unique(all_entities$group)
group_list <- group_list[group_list != "double SCM/ISD" & 
                           group_list != "double ISD/Pryvat"]
group_list[group_list=="Kyiv-Seven"] <- "KyivSeven"
group_list[group_list=="Finansy i Kredyt"] <- "FinansyiKredyt"
group_list[group_list=="Old Donetsk"] <- "OldDonetsk"

# back up dummies from Stata file
# for (g in group_list) {
#   dots <- setNames(paste0(g), paste0(g, "_dummy"))
#   all_entities <- all_entities %>%
#     mutate_(.dots = dots) 
# }

# assign group variables: set to 1 if a node has 
# a given group affiliation at least once in the data
# (this is for network graph plots) 
# for (g in group_list) {
#   dots <- setNames(paste0('max(', g, ')'), g)
#   all_entities <- all_entities %>% group_by(id) %>%
#     mutate_(.dots = dots) %>% ungroup()
# }

all_entities <- all_entities %>% 
  group_by(id) %>%
  mutate(across(group_list, max)) %>% 
  ungroup()

# recode some variables
all_entities <- all_entities %>%
  rename(entity_name = name, Delo_owner = owner) %>%
  mutate(entity_name_latin = stri_trans_general(entity_name, "Ukrainian-Latin/BGN"),
         country = as.integer(country),
         # create a unified id based on okpo, entity type (corporate/individual), 
         # and country (Ukrainian/foreign)
         # by adding two-letter prefix (C/I and U/F) to each numeric id
         id = case_when(
           entity == 1 & country == 804 ~ paste0("UC", id),
           entity == 1 & country != 804 ~ paste0("FC", id),
           entity == 0 & country == 804 ~ paste0("UI", id),
           entity == 0 & country != 804 ~ paste0("FI", id)
         ),
         # analogous identifier for owners in each ownership link
         prev_id = case_when(
           prev_entity == 1 & prev_country == 804 ~ paste0("UC", prev_id),
           prev_entity == 1 & prev_country != 804 ~ paste0("FC", prev_id),
           prev_entity == 0 & prev_country == 804 ~ paste0("UI", prev_id),
           prev_entity == 0 & prev_country != 804 ~ paste0("FI", prev_id)
         ),
         # node status
         entity = ifelse(entity == 1, "corporate", "individual"),
         # specify group for each oligarch
         # used in a different script to calculate defensive ownership measures
         group_oligarch = case_when(
           entity_name == "RINAT AKHMETOV" ~ "SCM",
           entity_name == "ARSEN AVAKOV" ~ "Basis",
           entity_name == "HENNADIY BOHOLYUBOV" ~ "Pryvat",
           entity_name == "YEVHEN CHERVONENKO" ~ "Orlan",
           entity_name == "ANDRIY DERKACH" ~ "Derkach",
           entity_name == "OLEKSANDR FELDMAN" ~ "Feldman",
           entity_name == "ANATOLIY FRANCHUK" ~ "Franchuk",
           entity_name == "BOGDAN GUBSKY" ~ "Kyiv-Seven",
           entity_name == "VITALIY HAIDUK" ~ "ISD",
           entity_name == "OLEKSANDR KAHANOVSKYI" ~ "Kahanovskyi",
           entity_name == "YURIY KARPENKO" ~ "Kyiv-Seven",
           entity_name == "VASYL KHMELNYTSKY" ~ "Khmelnytskyi",
           entity_name == "VALERIY KHOROSHKOVSKYI" ~ "UkrSotsBank",
           entity_name == "SERHIY KLYUYEV" ~ "Kliuev",
           entity_name == "IHOR KOLOMOYSKYI" ~ "Pryvat",
           entity_name == "OLEKSANDR LESHCHINSKYI" ~ "Ukrinterproduct",
           entity_name == "YURIY LYAKH" ~ "Kyiv-Seven",
           entity_name == "OLEKSIY MARTYNOV" ~ "Pryvat",
           entity_name == "VIKTOR MEDVEDCHUK" ~ "Kyiv-Seven",
           entity_name == "OLEH MKRTCHIAN" ~ "ISD",
           entity_name == "VIKTOR PINCHUK" ~ "Interpipe",
           entity_name == "PETRO POROSHENKO" ~ "UkrPromInvest",
           entity_name == "OLEKSANDR RYMARUK" ~ "Rymaruk",
           entity_name == "FEDIR SHPYG" ~ "Aval",
           entity_name == "VOLODYMYR SKUBENKO" ~ "Radon",
           entity_name == "HRYHORIY SURKIS" ~ "Kyiv-Seven",
           entity_name == "IHOR SURKIS" ~ "Kyiv-Seven",
           entity_name == "MYKHAYLO TABACHNYK" ~ "Tabachnyk",
           entity_name == "SERHIY TARUTA" ~ "ISD",
           entity_name == "SERHIY TIHIPKO" ~ "TAS",
           entity_name == "OLEKSANDR TRETYAKOV" ~ "Tretiakov",
           entity_name == "MYKOLA YANKOVSKY" ~ "Old Donetsk",
           entity_name == "OLEKSANDR YAROSLAVSKYI" ~ "UkrSybBank",
           entity_name == "PETRO YUSHCHENKO" ~ "Yushchenko",
           entity_name == "VALENTYN ZGHURSKYI" ~ "Kyiv-Seven",
           entity_name == "KOSTYANTIN ZHEVAGO" ~ "Finansy i Kredyt"
         ))

# oligarch dummy as factor
all_entities <- all_entities %>%
  mutate(oligarch = factor(oligarch, label = c("not", "oligarch")))

# extract all vertices (firms and individuals)
all.ids <- all_entities %>% 
  drop_na(id) %>%
  arrange(id, entity, country, step) %>%
  group_by(id, entity, country) %>% slice(1) %>% ungroup() %>%
  mutate(category = case_when(
    country == 804 & entity == "corporate" ~ "Ukrainian firm",
    country != 804 & entity == "corporate" ~ "Foreign firm",
    entity == "individual" ~ "Individual"
  ))

# fix group for Yushchenko and Leschinskiy
if ("PETRO YUSHCHENKO" %in% all.ids$entity_name) {
  all.ids[all.ids$entity_name == "PETRO YUSHCHENKO",]$group <- "Yushchenko"
  all.ids[all.ids$entity_name == "PETRO YUSHCHENKO",]$Yushchenko <- 1
}

if ("OLEKSANDR LESHCHINSKYI" %in% all.ids$entity_name) {
  all.ids[all.ids$entity_name == "OLEKSANDR LESHCHINSKYI",]$group <- "Ukrinterproduct"
  all.ids[all.ids$entity_name == "OLEKSANDR LESHCHINSKYI",]$Ukrinterproduct <- 1
}

# create a list of edges from ownership links
# each edge is owner id + subsidiary id
# step is iteration in owner search in registries
# (step 0 means a firm is in the main set of Delo/UP firms)
all.edges <- all_entities %>% filter(!is.na(id) & 
                                              !is.na(prev_id)) %>%
  mutate(owner = id, sub = prev_id) %>% dplyr::select(owner, sub, step) %>%
  arrange(owner, sub, step) %>%
  distinct(owner, sub, .keep_all = T) 

# create directed network from edges
o.network.D <- graph.data.frame(all.edges, directed = T, vertices = all.ids)

# calculate some basic stats for nodes
V(o.network.D)$degree <- igraph::degree(o.network.D)
V(o.network.D)$degree.out <- igraph::degree(o.network.D, mode="out")
V(o.network.D)$betweenness <- estimate_betweenness(o.network.D, 
                                                   directed = F, cutoff=1000000)
